import os
import pandas as pd
import re

# 1. 加载 XLS 文件
xls_path = 'prov_city_list.xls'
xls_df = pd.read_excel(xls_path)
valid_usernames = set(xls_df['user_name'].astype(str).str.strip())

# 2. 遍历目标 CSV 文件夹
csv_dir = 'Data/汇总/汇总/'
csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

results = []
found_usernames = set()

for fname in csv_files:
    match = re.match(r"(.+)_\d{4}-\d{2}-\d{2}\.csv", fname)
    if match:
        username = match.group(1).strip()
        found_usernames.add(username)
        status = "匹配" if username in valid_usernames else "未匹配"
    else:
        username = "无法识别"
        status = "格式错误"

    results.append({
        "文件名": fname,
        "提取用户名": username,
        "匹配状态": status
    })

# 3. 差集计算
missing_users = valid_usernames - found_usernames
extra_users = found_usernames - valid_usernames

missing_df = pd.DataFrame({"缺失用户名（仅存在于XLS中）": sorted(missing_users)})
extra_df = pd.DataFrame({"多余用户名（仅存在于CSV中）": sorted(extra_users)})

# 4. 保存三份 Excel 报表
output_dir = 'Data/汇总/'
pd.DataFrame(results).to_excel(os.path.join(output_dir, "用户名匹配检查结果.xlsx"), index=False)
missing_df.to_excel(os.path.join(output_dir, "缺失用户名列表.xlsx"), index=False)
extra_df.to_excel(os.path.join(output_dir, "多余用户名列表.xlsx"), index=False)

print(f"[√] 检查完成，已生成以下文件：")
print(f"    - 用户名匹配检查结果.xlsx")
print(f"    - 缺失用户名列表.xlsx")
print(f"    - 多余用户名列表.xlsx")